
library(flextable)
library(officer)
library(dplyr)
library(tidyr)
library(ggplot2)
library(xtable)
library(stargazer)
library(modelsummary)



df <- read.csv("Conspiracy Beliefs Cleaned.csv")


df <- df %>%
  mutate(ResponseID = row_number())

anyDuplicated(df$ResponseID)

df <- df %>%
  filter(!(Acquiescence == 1 | Cheerlead == 1))


## Matching theories & behaviors###

theory_list <- list(
  A2020Stolen     = list(heard = "RepubHeard_11"),
  SandyHook       = list(heard = "RepubHeard_4"),
  QAnon           = list(heard = "RepubHeard_5"),
  A2016Collusion  = list(heard = "DemHeard_4"),
  A9_11           = list(heard = "DemHeard_5"),
  A2024Stolen     = list(heard = "DemHeard_6"),
  UFOs            = list(heard = "NeutralHeard_5"),
  MoonLand        = list(heard = "NeutralHeard_6"),
  Epstein         = list(heard = "NeutralHeard_11")
)

# Behavior items for each theory
behavior_sets <- list(
  A2020Stolen    = c("RepubBehav_1_2", "RepubBehav_2_2", "RepubBehav_3_2", "RepubBehav_4_2"),
  SandyHook      = c("RepubBehav_1_1", "RepubBehav_2_1", "RepubBehav_3_1", "RepubBehav_4_1"),
  QAnon          = c("RepubBehav_1_3", "RepubBehav_2_3", "RepubBehav_3_3", "RepubBehav_4_3"),
  A2016Collusion = c("DemBehav_1_1",   "DemBehav_2_1",   "DemBehav_3_1",   "DemBehav_4_1"),
  A9_11          = c("DemBehav_1_2",   "DemBehav_2_2",   "DemBehav_3_2",   "DemBehav_4_2"),
  A2024Stolen    = c("DemBehav_1_3",   "DemBehav_2_3",   "DemBehav_3_3",   "DemBehav_4_3"),
  UFOs           = c("NeutralBehav__1_2", "NeutralBehav__2_2", "NeutralBehav__3_2", "NeutralBehav__4_2"),
  MoonLand       = c("NeutralBehav__1_3", "NeutralBehav__2_3", "NeutralBehav__3_3", "NeutralBehav__4_3"),
  Epstein        = c("NeutralBehav__1_4", "NeutralBehav__2_4", "NeutralBehav__3_4", "NeutralBehav__4_4")
)

behavior_labels <- c("spoken", "posted", "searched_like_minded", "searched_verification")
theories        <- names(theory_list)
traits          <- c("TrustScale", "ParanoidScale", "OddBeliefsScale")


## Construct belief / familiarity / certainty variables ##


for (theory in theories) {
  heard_var       <- theory_list[[theory]]$heard
  belief_orig_var <- theory  # original 1–5 item
  
  belief_bin_var  <- paste0("belief_binary_",   theory)
  certainty_var   <- paste0("certainty_",       theory)
  fam_var         <- paste0("fam_",             theory)
  certfam_var     <- paste0("certfam_",         theory)  # NEW
  
  # Belief binary: 1 if 4 or 5
  df[[belief_bin_var]] <- ifelse(df[[belief_orig_var]] >= 4, 1, 0)
  
  # Certainty binary: 5
  df[[certainty_var]]  <- ifelse(df[[belief_orig_var]] == 5, 1, 0)
  
  # Familiarity: heard indicator
  df[[fam_var]]        <- df[[heard_var]]
  
  # Certainty + familiarity
  df[[certfam_var]]    <- ifelse(df[[certainty_var]] == 1 & df[[fam_var]] == 1, 1, 0)
}


## TRAIT MODELS ####

trait_level_results <- data.frame()

for (trait in traits) {
  for (theory in theories) {
    
    belief_bin_var <- paste0("belief_binary_", theory)
    certainty_var  <- paste0("certainty_",     theory)
    fam_var        <- paste0("fam_",           theory)
    certfam_var    <- paste0("certfam_",       theory)  
    
    needed <- c(trait, belief_bin_var, certainty_var, fam_var, certfam_var)
    if (!all(needed %in% names(df))) next
    
    sub_df <- data.frame(
      belief_binary    = df[[belief_bin_var]],
      certainty_binary = df[[certainty_var]],
      familiarity      = df[[fam_var]],
      certfam_binary   = df[[certfam_var]],
      Trait            = df[[trait]]
    ) %>%
      tidyr::drop_na()
    
    if (nrow(sub_df) < 10) next
    
    pseudoR2 <- function(model) 1 - model$deviance / model$null.deviance
    
    ## 1) belief_binary ~ Trait
    m1 <- glm(belief_binary ~ Trait, data = sub_df, family = binomial)
    trait_level_results <- rbind(
      trait_level_results,
      data.frame(
        Trait     = trait,
        Theory    = theory,
        Measure   = "belief_binary",
        Coef      = coef(m1)[2],
        p_value   = coef(summary(m1))[2, 4],
        pseudo_R2 = pseudoR2(m1),
        AIC       = AIC(m1)
      )
    )
    
    ## 2) certainty_binary ~ Trait
    m2 <- glm(certainty_binary ~ Trait, data = sub_df, family = binomial)
    trait_level_results <- rbind(
      trait_level_results,
      data.frame(
        Trait     = trait,
        Theory    = theory,
        Measure   = "certainty_binary",
        Coef      = coef(m2)[2],
        p_value   = coef(summary(m2))[2, 4],
        pseudo_R2 = pseudoR2(m2),
        AIC       = AIC(m2)
      )
    )
    
    ## 3) familiarity ~ Trait
    m3 <- glm(familiarity ~ Trait, data = sub_df, family = binomial)
    trait_level_results <- rbind(
      trait_level_results,
      data.frame(
        Trait     = trait,
        Theory    = theory,
        Measure   = "familiarity_binary",
        Coef      = coef(m3)[2],
        p_value   = coef(summary(m3))[2, 4],
        pseudo_R2 = pseudoR2(m3),
        AIC       = AIC(m3)
      )
    )
    
    ## 4) certfam_binary ~ Trait  
    m4 <- glm(certfam_binary ~ Trait, data = sub_df, family = binomial)
    trait_level_results <- rbind(
      trait_level_results,
      data.frame(
        Trait     = trait,
        Theory    = theory,
        Measure   = "certfam_binary",
        Coef      = coef(m4)[2],
        p_value   = coef(summary(m4))[2, 4],
        pseudo_R2 = pseudoR2(m4),
        AIC       = AIC(m4)
      )
    )
  }
}


trait_summary <- trait_level_results %>%
  group_by(Trait, Measure) %>%
  summarise(
    Mean_Coef = round(mean(Coef,      na.rm = TRUE), 3),
    Mean_p    = round(mean(p_value,   na.rm = TRUE), 3),
    Mean_R2   = round(mean(pseudo_R2, na.rm = TRUE), 3),
    Mean_AIC  = round(mean(AIC,       na.rm = TRUE), 2),
    sig_count = sum(p_value < 0.05,   na.rm = TRUE),
    .groups   = "drop"
  )



  stargazer(
    trait_summary,
    summary  = FALSE,
    rownames = FALSE,
    type     = "latex",
    out      = "trait_summary.txt"
  )
  


  ft <- flextable(trait_summary)
  ft <- autofit(ft)
  
  # Export to Word
  doc <- read_docx()
  doc <- body_add_flextable(doc, ft)
  print(doc, target = "trait_summary_table.docx")
  



##Trait Visualized##
  library(dplyr)
  library(ggplot2)
  
  trait_labels <- c(
    "TrustScale"      = "Trust",
    "ParanoidScale"   = "Paranoia",
    "OddBeliefsScale" = "Odd beliefs"
  )
  

  
  main_trait_R2 <- trait_summary %>%
    filter(Measure %in% c("belief_binary", "certfam_binary")) %>%
    mutate(
      Trait_pretty = factor(trait_labels[Trait],
                            levels = c("Trust", "Paranoia", "Odd beliefs")),
      Measure_pretty = recode(
        Measure,
        "belief_binary"  = "Agreement",
        "certfam_binary" = "Certainty + familiarity"
      )
    )
  
  
  
  # grayscale palette for the two models
  trait_fill_bw <- c(
    "Agreement"               = "#7F7F7F",  # medium gray
    "Certainty + familiarity" = "#000000"   # black
  )
  
  ggplot(main_trait_R2,
         aes(x = Measure_pretty, y = Mean_R2, fill = Measure_pretty)) +
    geom_col(width = 0.6) +
    facet_wrap(~ Trait_pretty, nrow = 1) +
    labs(
      x = "",
      y = "Mean pseudo-R² (across theories)",
      fill = "",
      title = "Variance in belief measures explained by traits:\nAgreement vs certainty + familiarity"
    ) +
    scale_fill_manual(values = trait_fill_bw) +     # <<< put the B&W scale back
    theme_bw() +
    theme(
      panel.grid.major = element_line(color = "gray80", size = 0.4),
      panel.grid.minor = element_line(color = "gray90", size = 0.2),
      axis.text.x      = element_text(angle = 30, hjust = 1),
      legend.position  = "NA",
      plot.title       = element_text(face = "bold")
    )
  

  

##MODEL TRAIT COMPARISONS##
  library(dplyr)
  library(ggplot2)
  
  dot_palette <- c(
    "Agreement"                = "#B2B2B2",  # light gray
    "Certainty + familiarity" = "#000000"   # black
  )
  
  
  appendix_trait_plot <- trait_summary %>%
    filter(Measure %in% c("belief_binary", "certfam_binary")) %>%
    mutate(
      Trait_pretty = factor(trait_labels[Trait],
                            levels = c("Trust", "Paranoia", "Odd beliefs")),
      Measure_pretty = recode(
        Measure,
        "belief_binary"  = "Agreement",
        "certfam_binary" = "Certainty + familiarity"
      )
    )

  dot_palette_bw <- c(
    "Agreement"               = "#7F7F7F",  # medium gray
    "Certainty + familiarity" = "#000000"   # black
  )
  
  ggplot(appendix_trait_plot,
         aes(x = Mean_Coef, y = Trait_pretty, color = Measure_pretty)) +
    # vertical line
    geom_vline(xintercept = 0, linetype = "dashed", color = "gray40") +
    
    geom_point(size = 3) +
    
    labs(
      x = "Average logit coefficient",
      y = "",
      title = "Trait associations for agreement vs. certainty + familiarity",
      color = "Model"
    ) +
    scale_color_manual(values = dot_palette_bw) +
    
    #MORE GRIDLINES TO MATCH
    theme_bw(base_size = 12) +
    theme(
      panel.grid.major = element_line(color = "gray80", size = 0.4),
      panel.grid.minor = element_line(color = "gray90", size = 0.2),
      legend.position  = "bottom",
      plot.title       = element_text(face = "bold"),
      axis.title.x     = element_text(size = 12)
    )
  
  
  unique(trait_summary$Measure) 
  
  
  
  ####ON ONE GRAPH######
  
  trait_plot <- trait_summary %>%
    # only keep Agreement and Certainty + familiarity models
    filter(Measure %in% c("belief_binary", "certfam_binary")) %>%
    mutate(
      Trait_pretty = factor(trait_labels[Trait],
                            levels = c("Trust", "Paranoia", "Odd beliefs")),
      Measure_pretty = recode(
        Measure,
        "belief_binary"  = "Agreement",
        "certfam_binary" = "Certainty + familiarity"
      )
    )
  
  ggplot(trait_plot,
         aes(x = Mean_Coef, y = Trait_pretty, color = Measure_pretty)) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    geom_point(size = 2) +
    labs(
      x = "Average logit coefficient",
      y = "",
      color = "Model",
      title = "Trait associations for agreement vs. certainty + familiarity"
    ) +
    theme_bw()

  
  
  ## BEHAVIOR MODELS ####
  
  results_behavior_metrics <- data.frame()
  
  pseudoR2 <- function(model) 1 - model$deviance / model$null.deviance
  
  for (theory in theories) {
    belief_bin_var <- paste0("belief_binary_", theory)
    certainty_var  <- paste0("certainty_",     theory)
    fam_var        <- paste0("fam_",           theory)
    salience_var   <- paste0("certfam_",       theory)  # 1 if certain AND familiar
    behavior_vars  <- behavior_sets[[theory]]
    
    # now require salience_var as well
    needed <- c(belief_bin_var, certainty_var, fam_var, salience_var, behavior_vars)
    if (!all(needed %in% names(df))) next
    
    for (i in seq_along(behavior_vars)) {
      behavior       <- behavior_vars[i]
      behavior_label <- behavior_labels[i]
      
      temp_df <- data.frame(
        belief_binary    = df[[belief_bin_var]],
        certainty_binary = df[[certainty_var]],
        familiarity      = df[[fam_var]],
        salience_binary  = df[[salience_var]],  # <-- NEW COLUMN
        behavior         = df[[behavior]]
      ) %>%
        tidyr::drop_na()
      
      if (nrow(temp_df) < 10) next
      
      # existing models
      m1 <- glm(behavior ~ belief_binary,               data = temp_df, family = binomial)
      m2 <- glm(behavior ~ certainty_binary,            data = temp_df, family = binomial)
      m3 <- glm(behavior ~ familiarity,                 data = temp_df, family = binomial)
      m4 <- glm(behavior ~ certainty_binary + familiarity,
                data = temp_df, family = binomial)
      
      # salience as a single predictor
      m5 <- glm(behavior ~ salience_binary,             data = temp_df, family = binomial)
      
      results_behavior_metrics <- rbind(
        results_behavior_metrics,
        data.frame(theory = theory, behavior = behavior_label, model = "Belief Only",
                   AIC = AIC(m1), R2 = pseudoR2(m1)),
        data.frame(theory = theory, behavior = behavior_label, model = "Certainty Only",
                   AIC = AIC(m2), R2 = pseudoR2(m2)),
        data.frame(theory = theory, behavior = behavior_label, model = "Familiarity Only",
                   AIC = AIC(m3), R2 = pseudoR2(m3)),
        data.frame(theory = theory, behavior = behavior_label, model = "Cert + Fam (2 predictors)",
                   AIC = AIC(m4), R2 = pseudoR2(m4)),
        data.frame(theory = theory, behavior = behavior_label, model = "Salience Only",
                   AIC = AIC(m5), R2 = pseudoR2(m5))  # <-- NEW ROW
      )
    }
  }
  
  # Aggregate by behavior type
  results_behavior_metrics_clean <- results_behavior_metrics %>%
    distinct(theory, behavior, model, AIC, R2)
  
  summary_behavior_fit <- results_behavior_metrics_clean %>%
    group_by(behavior, model) %>%
    summarise(
      Mean_AIC   = mean(AIC, na.rm = TRUE),
      Mean_R2    = mean(R2,  na.rm = TRUE),
      n_theories = n(),
      .groups    = "drop"
    ) %>%
    arrange(behavior, Mean_AIC)
  
  print(summary_behavior_fit, n = 50)
  

stargazer(
  summary_behavior_fit,
  summary  = FALSE,
  rownames = FALSE,
  type     = "latex",
  out      = "trait_summary.txt"
)




ft <- flextable(summary_behavior_fit)
ft <- autofit(ft)

# Export to Word
doc <- read_docx()
doc <- body_add_flextable(doc, ft)
print(doc, target = "behavior_table.docx")





##Model fit for behavioral outcomes ##

plot_data_fit <- summary_behavior_fit %>%
  filter(model %in% c("Belief Only",
                      "Certainty Only",
                      "Familiarity Only",
                      "Cert + Fam (2 predictors)")) %>%
  mutate(
    Model_pretty = dplyr::recode(
      model,
      "Belief Only"               = "Agreement",
      "Certainty Only"            = "Certainty",
      "Familiarity Only"          = "Familiarity",
      "Cert + Fam (2 predictors)" = "Certainty + familiarity"
    ),
    Model_pretty = factor(
      Model_pretty,
      levels = c("Agreement", "Certainty", "Familiarity", "Certainty + familiarity")
    ),
    Behavior_pretty = dplyr::recode(
      behavior,
      "spoken"                 = "Spoken",
      "posted"                 = "Posted",
      "searched_like_minded"   = "Like-minded search",
      "searched_verification"  = "Verification search"
    )
  )

model_palette_bw <- c(
  "Agreement"               = "#4D4D4D",  # dark gray
  "Certainty"               = "#808080",  # medium gray
  "Familiarity"             = "#BFBFBF",  # light gray
  "Certainty + familiarity" = "#000000"   # black
)

ggplot(plot_data_fit,
       aes(x = Model_pretty, y = Mean_R2, fill = Model_pretty)) +
  geom_col(width = 0.7) +
  facet_wrap(~ Behavior_pretty, scales = "free_y") +
  labs(
    x = "Measure used as predictor",
    y = "Mean pseudo-R²",
    title = "Model fit for behavioral outcomes by belief measure",
    fill = "Model"
  ) +
  scale_fill_manual(values = model_palette_bw) +
  theme_bw(base_size = 12) +
  theme(
    panel.grid.major = element_line(color = "gray80", size = 0.4),
    panel.grid.minor = element_line(color = "gray90", size = 0.2),
    axis.text.x      = element_text(angle = 30, hjust = 1),
    legend.position  = "bottom",
    plot.title       = element_text(face = "bold")
  )


###FOR JUST THE TWO MODELS###

behavior_palette_two <- c(
  "Agreement"               = "#7F7F7F",  # medium gray
  "Certainty + familiarity" = "#000000"   # black
)


plot_data_two <- summary_behavior_fit %>%
  filter(model %in% c("Belief Only",
                      "Cert + Fam (2 predictors)")) %>%
  mutate(
    Model_pretty = dplyr::recode(
      model,
      "Belief Only"               = "Agreement",
      "Cert + Fam (2 predictors)" = "Certainty + familiarity"
    ),
    Model_pretty = factor(
      Model_pretty,
      levels = c("Agreement", "Certainty + familiarity")
    ),
    Behavior_pretty = dplyr::recode(
      behavior,
      "spoken"                 = "Spoken",
      "posted"                 = "Posted",
      "searched_like_minded"   = "Like-minded search",
      "searched_verification"  = "Verification search"
    )
  )

ggplot(plot_data_two,
       aes(x = Model_pretty, y = Mean_R2, fill = Model_pretty)) +
  geom_col(width = 0.7) +
  facet_wrap(~ Behavior_pretty, scales = "free_y") +
  labs(
    x = "Measure used as predictor",
    y = "Mean pseudo-R² (across theories)",
    title = "Behavioral model performance: Agreement vs. certainty + familiarity",
    fill = "Model"
  ) +
  scale_fill_manual(values = behavior_palette_two) +
  theme_bw(base_size = 12) +
  theme(
    panel.grid.major = element_line(color = "gray80", size = 0.4),
    panel.grid.minor = element_line(color = "gray90", size = 0.2),
    axis.text.x      = element_text(angle = 30, hjust = 1),
    legend.position  = "NA",
    strip.text       = element_text(size = 12, face = "bold"),
    plot.title       = element_text(face = "bold")
  )






### Behavior by belief status ##

df$respondent_id <- seq_len(nrow(df))


library(dplyr)
library(tidyr)
library(ggplot2)

# Build long data: one row per respondent × theory 
df_long <- data.frame()

for (theory in names(behavior_sets)) {
  belief_bin_var <- paste0("belief_binary_", theory)
  certainty_var  <- paste0("certainty_",     theory)
  fam_var        <- paste0("fam_",           theory)
  beh_vars       <- behavior_sets[[theory]]  # 4 behavior columns
  
  needed  <- c(belief_bin_var, certainty_var, fam_var, beh_vars)
  missing <- setdiff(needed, names(df))
  if (length(missing) > 0) {
    message("Skipping theory ", theory, " (missing: ",
            paste(missing, collapse = ", "), ")")
    next
  }
  
  tmp <- df[, needed]
  names(tmp) <- c("belief_binary", "certainty_binary", "familiarity_binary",
                  "spoken", "posted", "searched_like_minded", "searched_verification")
  
  tmp <- tmp %>%
    mutate(
      theory = theory,
      # three mutually exclusive belief categories
      group = dplyr::case_when(
        belief_binary == 0 ~ "Doesn't agree",
        belief_binary == 1 & certainty_binary == 0 ~ "Agreement",
        belief_binary == 1 & certainty_binary == 1 & familiarity_binary == 1 ~ "Certainty + familiar",
        TRUE ~ NA_character_
      )
    )
  
  df_long <- dplyr::bind_rows(df_long, tmp)
}

# drop any rows that didn't fall into one of the three groups
df_long <- df_long %>% filter(!is.na(group))

# Reshape behaviors to long format
df_behav <- df_long %>%
  tidyr::pivot_longer(
    cols = c(spoken, posted, searched_like_minded, searched_verification),
    names_to = "behavior_type",
    values_to = "behavior"
  )

# Compute mean behavior rate by belief group × behavior type
plot_data_rates <- df_behav %>%
  group_by(behavior_type, group) %>%
  summarise(
    mean_rate = mean(behavior, na.rm = TRUE),
    n         = n(),
    .groups   = "drop"
  ) %>%
  mutate(
    # Clean up belief category labels
    group = dplyr::recode(
      group,
      "Doesn't agree"        = "Doesn’t agree",
      "Agreement"            = "Agrees",
      "Certainty + familiar" = "Certain & familiar"
    ),
    group = factor(group,
                   levels = c("Doesn’t agree", "Agrees", "Certain & familiar")),
    
    # Clean up behavior labels for facets
    behavior_type = dplyr::recode(
      behavior_type,
      "spoken"                = "Spoken",
      "posted"                = "Posted",
      "searched_like_minded"  = "Like-minded search",
      "searched_verification" = "Verification search"
    ),
    behavior_type = factor(
      behavior_type,
      levels = c("Spoken", "Posted", "Like-minded search", "Verification search")
    )
  )



# Plot


group_palette_bw <- c(
  "Doesn't agree"       = "#D0D0D0",
  "Agrees"              = "#7F7F7F",
  "Certain & familiar"  = "#000000"
)


ggplot(plot_data_rates,
       aes(x = group, y = mean_rate, fill = group)) +
  geom_col(width = 0.7) +
  facet_wrap(~ behavior_type, scales = "free_y") +
  labs(
    x = "Belief category",
    y = "Mean probability of behavior",
    title = "Behavior rates by belief category",
    fill = "Belief type"
  ) +
  scale_fill_manual(values = group_palette_bw) +
  theme_bw(base_size = 12) +
  theme(
    panel.grid.major = element_line(color = "gray80", size = 0.4),
    panel.grid.minor = element_line(color = "gray90", size = 0.2),
    axis.text.x      = element_text(angle = 25, hjust = 1),
    legend.position  = "NA",
    strip.text       = element_text(size = 12, face = "bold"),
    plot.title       = element_text(face = "bold")
  )



### ADDITIONAL SENSITIVITY CHECKS ### APPENDIX #####

pseudoR2 <- function(model) 1 - model$deviance / model$null.deviance

# --------------------------
# sensitivity vars 
# --------------------------
for (theory in theories) {
  belief_orig_var <- theory
  heard_var       <- theory_list[[theory]]$heard
  
  certainty_ord <- paste0("certainty_ord_", theory)
  fam_var       <- paste0("fam_",          theory)
  
  salience5_var <- paste0("salience5_",    theory)
  salience4_var <- paste0("salience4_",    theory)
  
  # certainty_ord (1–5)
  if (!certainty_ord %in% names(df)) {
    df[[certainty_ord]] <- df[[belief_orig_var]]
  }
  
  # fam_*
  if (!fam_var %in% names(df)) {
    df[[fam_var]] <- df[[heard_var]]
  }
  
  # salience5 / salience4
  if (!salience5_var %in% names(df)) {
    df[[salience5_var]] <- ifelse(df[[certainty_ord]] == 5 & df[[fam_var]] == 1, 1, 0)
  }
  if (!salience4_var %in% names(df)) {
    df[[salience4_var]] <- ifelse(df[[certainty_ord]] >= 4 & df[[fam_var]] == 1, 1, 0)
  }
}


appendixF_behavior_metrics <- data.frame()

for (theory in theories) {
  belief_bin_var <- paste0("belief_binary_", theory)
  certainty_bin  <- paste0("certainty_",     theory)
  certainty_ord  <- paste0("certainty_ord_", theory)
  fam_var        <- paste0("fam_",           theory)
  
  behavior_vars  <- behavior_sets[[theory]]
  needed <- c(belief_bin_var, certainty_bin, certainty_ord, fam_var, behavior_vars)
  if (!all(needed %in% names(df))) next
  
  for (i in seq_along(behavior_vars)) {
    behavior       <- behavior_vars[i]
    behavior_label <- behavior_labels[i]
    
    tmp <- data.frame(
      belief_binary    = df[[belief_bin_var]],
      certainty_binary = df[[certainty_bin]],
      certainty_ord    = df[[certainty_ord]],
      familiarity      = df[[fam_var]],
      behavior         = df[[behavior]]
    ) %>%
      tidyr::drop_na()
    
    if (nrow(tmp) < 25) next
    
    # Main specs 
    m_agree   <- glm(behavior ~ belief_binary, data = tmp, family = binomial)
    m_certbin <- glm(behavior ~ certainty_binary, data = tmp, family = binomial)
    m_fam     <- glm(behavior ~ familiarity, data = tmp, family = binomial)
    m_joint   <- glm(behavior ~ certainty_binary + familiarity, data = tmp, family = binomial)
    
    # Sensitivity: ordinal certainty
    m_certord <- glm(behavior ~ certainty_ord, data = tmp, family = binomial)
    m_joint_o <- glm(behavior ~ certainty_ord + familiarity, data = tmp, family = binomial)
    
    appendixF_behavior_metrics <- rbind(
      appendixF_behavior_metrics,
      data.frame(theory = theory, behavior = behavior_label, model = "Agreement",
                 AIC = AIC(m_agree), R2 = pseudoR2(m_agree)),
      data.frame(theory = theory, behavior = behavior_label, model = "Certainty (binary)",
                 AIC = AIC(m_certbin), R2 = pseudoR2(m_certbin)),
      data.frame(theory = theory, behavior = behavior_label, model = "Familiarity",
                 AIC = AIC(m_fam), R2 = pseudoR2(m_fam)),
      data.frame(theory = theory, behavior = behavior_label, model = "Certainty (binary) + Familiarity",
                 AIC = AIC(m_joint), R2 = pseudoR2(m_joint)),
      data.frame(theory = theory, behavior = behavior_label, model = "Certainty (ordinal)",
                 AIC = AIC(m_certord), R2 = pseudoR2(m_certord)),
      data.frame(theory = theory, behavior = behavior_label, model = "Certainty (ordinal) + Familiarity",
                 AIC = AIC(m_joint_o), R2 = pseudoR2(m_joint_o))
    )
  }
}

appendixF_behavior_metrics <- appendixF_behavior_metrics %>%
  distinct(theory, behavior, model, AIC, R2)

appendixF_behavior_fit <- appendixF_behavior_metrics %>%
  group_by(behavior, model) %>%
  summarise(
    Mean_AIC = mean(AIC, na.rm = TRUE),
    Mean_R2  = mean(R2,  na.rm = TRUE),
    .groups  = "drop"
  ) %>%
  mutate(
    behavior = recode(
      behavior,
      "spoken"                = "Spoken",
      "posted"                = "Posted",
      "searched_like_minded"  = "Like-minded search",
      "searched_verification" = "Verification search"
    )
  ) %>%
  arrange(behavior, Mean_AIC)


ft1 <- flextable(appendixF_behavior_fit) %>% autofit()
doc1 <- read_docx() %>% body_add_par("Appendix F.1: Ordinal certainty sensitivity (behavior models)", style = "heading 1") %>%
  body_add_flextable(ft1)
print(doc1, target = "AppendixF_behavior_sensitivity_fit.docx")



